/*==================================================
Project:       Targeting Social Programs
Authors:       Diether W. Beuermann
               Bridget Hoffmann        
               Marco Stampini 
               David L. Vargas
               Diego Vera-Cossio
----------------------------------------------------
Creation Date:    Sep 2024
References:          
==================================================*/

/* This is a Data Master Do file */

/*
Running this file will replicate all the data cleaning and preparation for the analysis.

Admin data is restricted access. So, you will not be able to run the whole code, 
and some sections will be skipped. Because of this, you will not be able to fully replicate the 
datasets used in the analysis; however you'll get a close replica of the ones used in the paper.
A anonymised version of the datasets used in the analysis is provided in the replication package, 
so you can still run all the analysis codes (except for the admin data table A1).

Some advice:
        1. Although you can run the whole code in one go, we recommend that you run it
        exhibit by exhibit, as it may take a long time.
        2. Make sure to change all the paths where relevant in the program set section.
        3. Make sure you keep the folder structure of the replication package when running 
        to avoid errors.
        4. Although maintaining the do-files order is not strictly necessary, it is strongly 
        recommended.
*/


/*==================================================
            0: Program set up
==================================================*/
*Written on STATA 17
drop _all
*set varabbrev off	// no variable abbreviations allowed (personal preference)

// Additional packages
*ssc install gtools
*net install cleanplots, from("https://tdmize.github.io/data/cleanplots")
*set scheme cleanplots, perm
*ssc install icw_index
*ssc install outreg2
*ssc install winsor2
*ssc install binscatter
*ssc install elabel

// Folder root
clear all
if "`c(username)'" == "davidvargas"			global root "/Users/davidvargas/Dropbox/Research/BID"
else if inlist("`c(username)'", "David" , "DAVIDVAR" )	global root "C:\Users\\`c(username)'\Dropbox\Research\BID"
else 							global root "D:\Dropbox"

// Paths
global dir1r "$root/Targeting_project/11_replication"
global dir2r "$dir1r/02_Data/02_Rawdata"
global dir3r "$dir1r/02_Data/03_Cleandata"
global dir4r "$dir1r/03_Output/01_Tables"
global dir5r "$dir1r/03_Output/02_Graphs"
global dir6r "$dir1r/01_Codes"
global dir7r "$dir1r/02_Data/01_Codes"
*global dir8r "/Volumes/Targeting_01_encrypted" // encrypted files root

/*==================================================
        1. Admin Data
==================================================*/

/*----------------------------------------------------------------------
       -------------------- ADMIN DATA 	-----------------------
------------------------------------------------------------------------

***** Admin data is has restrincted access. ******

** Basic Admin data (SISBEN) Clean Up

*----------  1.1 Data 1: Data import and sampling
* do "${dir7r}/D1_SISBEN_sampling.do" 
/* Comment: This loads the restricted original data. 
Changes the data format and compress it. Also creates 
a subsample set only with the families in the survey 
sample frame */

*----------  1.2 Data 2: Admin Data clean up
*---- 1.2.A Data 2A: Clean up for survey framing sample 
* do "${dir7r}/D2_SISBEN_cleanup.do" 
/* Comment: This Cleans up admin data for the framing sample */

*---- 1.2.B Data 2B: Whole data clean up
* do "${dir7r}/D2B_SISBEN_universe_cleanup.do" 
/* Comment: This Cleans up admin data */

----------------------------------------------------------------------
    -------------------- ADMIN DATA END -----------------------
-----------------------------------------------------------------------*/

/*==================================================
        2. Survey Data
==================================================*/

** Basic Survey data Clean Up

*----------  1.3 Data 3: Main survey variables clean up
do "${dir7r}/D3_survey_main_cleaning.do" 

*----------  1.4 Data 4: Cleaning of additional module on assets' ownership
do "${dir7r}/D4_survey_assets_losses_cleaning.do" 

*----------  1.5 Data 5: Reshape in long format
do "${dir7r}/D5_shape_yearly.do" 

/*==================================================
        3. Auxiliary Data
==================================================*/

** Basic Survey data Clean Up

*----------  1.4 Data 6: Cleaning of Colombia's flagship recurent employement survey
do "${dir7r}/D6_GEIH_cleanning.do" 

/*==================================================
        3. PMT additional varibles
==================================================*/

** Additional clean up in preparation for analysis
/*----------------------------------------------------------------------
	-------------------- ADMIN DATA 	-----------------------
------------------------------------------------------------------------

***** Admin data is has restricted access. ******
*----------  1.4 Data 7: ADMIN data additional cleaning for PMT
do "${dir7r}/D7_PMT_SISBEN_prep.do" 

----------------------------------------------------------------------
	-------------------- ADMIN DATA END 	-----------------------
------------------------------------------------------------------------*/

*----------  1.4 Data 8: Survey data additional cleaning for PMT
do "${dir7r}/D8_PMT_srv_prep.do" 
